Notes & questions


Tried GxE test using robust variance estimates before realizing it didn’t fit any sort of relatedness matrix. Could fit PCs instead?

# Default PLINK module doesn't allow R plugins
module load plink/plink-high-contig-1.90p
R CMD Rserve --no-save
plink --bfile data/derived_data/robust_joint_gxe/test --threads 12 --allow-no-sex --cow --double-id --covar data/derived_data/robust_joint_gxe/test.cov --R source_functions/robust-joint-int-plugin.R

Setup

GEMMA GxE GWAS

gxe_gwas <-
  purrr::map2_dfr(.x = rep(c("temp", "day_length"), each = 4),
                 .y = rep(c(2016, 2017, 2018, 2019), times = 2),
                 ~ read_table2(here::here(glue("data/derived_data/gxe_gwas/{.x}/{.y}/result.assoc.txt"))) %>% 
                                 rename(pos = ps) %>% 
                                 mutate(var = .x,
                                        year = .y,
                                        adj_p = gcif(.,
                                                     adjust_p = TRUE),
                                        neglog10p = -log10(adj_p),
                                        q = qvalue::qvalue(adj_p)$qvalues,
                                        neglog10q = -log10(q)))

Metasoft

  • Input files
purrr::map(.x = c(2016, 2017, 2018, 2019),
             ~ gxe_gwas %>% 
             filter(var == "day_length") %>% 
             filter(year == .x) %>% 
             filter(!is.na(beta)) %>% 
             mutate(b = round(beta, digits = 7),
                    se = round(se, digits = 7)) %>%
             select(rs, !!rlang::sym(glue("b_{.x}")) := b, !!rlang::sym(glue("se_{.x}")) := se)) %>% 
  purrr::reduce(left_join) %>% 
  mutate_if(is.numeric, as.character) %>% 
  write_tsv(here::here("data/derived_data/metasoft/day_length/metasoft_in.day_length.txt"),
            col_names = FALSE)
purrr::map(.x = c(2016, 2017, 2018, 2019),
           ~ gxe_gwas %>%
             filter(var == "temp") %>% 
             filter(year == .x) %>% 
             filter(!is.na(beta)) %>% 
             mutate(b = round(beta, digits = 7),
                    se = round(se, digits = 7)) %>%
             select(rs, !!rlang::sym(glue("b_{.x}")) := b, !!rlang::sym(glue("se_{.x}")) := se)) %>% 
  purrr::reduce(left_join) %>% 
  mutate_if(is.numeric, as.character) %>% 
  write_tsv(here::here("data/derived_data/metasoft/temp/metasoft_in.temp.txt"),
            col_names = FALSE)
  • Output files
gxe_metasoft <- 
  c("temp", "day_length") %>% 
  purrr::set_names() %>% 
  purrr::map_dfr(~ read_table2(here::here(glue("data/derived_data/metasoft/{.x}/metasoft_out.{.x}_adj.txt")),
                               col_types = cols(.default = "d", RSID = "c")) %>% 
                   janitor::clean_names() %>% 
                   select(-pvalue_be, -contains("fe")) %>%
                   left_join(read_table2(here::here(glue("data/derived_data/metasoft/{.x}/metasoft_out.{.x}_adj.txt")),
                                         skip = 1,
                                         col_types = "c---------------dddddddd", 
                                         col_names = c("rsid", "p16", "p17", "p18", "p19", "m16", "m17", "m18", "m19"))) %>% 
                   mutate(chr = as.numeric(str_extract(rsid, "[[:digit:]]{1,2}(?=:)")),
                          pos = as.numeric(str_extract(rsid, "(?<=[[:digit:]]{1,2}:)[[:digit:]]+")),
                          neglog10q_re2 = -log10(qvalue::qvalue(pvalue_re2)$qvalues)),
                 .id = "var")

Step 1. Calculate contemporary group BLUEs to pre-adjust hair shedding scores

Step 2. Fit four year-specific GxE GWAS for each day length and temperature using GEMMA

Diagnostic

Q-Q plots pre- and post-genomic control

qq <-
  gxe_gwas %>% 
  select(var, year, p_wald, adj_p) %>% 
  nest(-var, -year) %>% 
  mutate(pre = purrr::pmap(list(x = data, y = year, z = var),
                            .f = function(x, y, z) {
                              x %>% 
                                pull(p_wald) %>% 
                                ggqq() +
                                labs(title = glue("{y} {z} pre-GC"))
                              }),
         post = purrr::pmap(list(x = data, y = year, z = var),
                            .f = function(x, y, z) {
                              x %>% 
                                pull(adj_p) %>% 
                                ggqq() +
                                labs(title = glue("{y} {z} post-GC"))
                              })) %>% 
  select(-data)
## Warning: All elements of `...` must be named.
## Did you want `data = c(p_wald, adj_p)`?

2016 temperature

qq %>% 
  filter(year == 2016 & var == "temp") %>% 
  pull(pre)
## filter: removed 7 rows (88%), one row remaining
## [[1]]

qq %>% 
  filter(year == 2016 & var == "temp") %>% 
  pull(post)
## filter: removed 7 rows (88%), one row remaining
## [[1]]

2016 day length

qq %>% 
  filter(year == 2016 & var == "day_length") %>% 
  pull(pre)
## filter: removed 7 rows (88%), one row remaining
## [[1]]

qq %>% 
  filter(year == 2016 & var == "day_length") %>% 
  pull(post)
## filter: removed 7 rows (88%), one row remaining
## [[1]]

2017 temperature

qq %>% 
  filter(year == 2017 & var == "temp") %>% 
  pull(pre)
## filter: removed 7 rows (88%), one row remaining
## [[1]]

qq %>% 
  filter(year == 2017 & var == "temp") %>% 
  pull(post)
## filter: removed 7 rows (88%), one row remaining
## [[1]]

2017 day length

qq %>% 
  filter(year == 2017 & var == "day_length") %>% 
  pull(pre)
## filter: removed 7 rows (88%), one row remaining
## [[1]]

qq %>% 
  filter(year == 2017 & var == "day_length") %>% 
  pull(post)
## filter: removed 7 rows (88%), one row remaining
## [[1]]

2018 temperature

qq %>% 
  filter(year == 2018 & var == "temp") %>% 
  pull(pre)
## filter: removed 7 rows (88%), one row remaining
## [[1]]

qq %>% 
  filter(year == 2018 & var == "temp") %>% 
  pull(post)
## filter: removed 7 rows (88%), one row remaining
## [[1]]

2018 day length

qq %>% 
  filter(year == 2018 & var == "day_length") %>% 
  pull(pre)
## filter: removed 7 rows (88%), one row remaining
## [[1]]

qq %>% 
  filter(year == 2018 & var == "day_length") %>% 
  pull(post)
## filter: removed 7 rows (88%), one row remaining
## [[1]]

2019 temperature

qq %>% 
  filter(year == 2019 & var == "temp") %>% 
  pull(pre)
## filter: removed 7 rows (88%), one row remaining
## [[1]]

qq %>% 
  filter(year == 2019 & var == "temp") %>% 
  pull(post)
## filter: removed 7 rows (88%), one row remaining
## [[1]]

2019 day length

qq %>% 
  filter(year == 2019 & var == "day_length") %>% 
  pull(pre)
## filter: removed 7 rows (88%), one row remaining
## [[1]]

qq %>% 
  filter(year == 2019 & var == "day_length") %>% 
  pull(post)
## filter: removed 7 rows (88%), one row remaining
## [[1]]

Inflation factors pre-genomic control

gxe_gwas %>% 
  select(var, year, p_wald, adj_p) %>% 
  group_by(var, year) %>% 
  nest() %>% 
  mutate(`Pre-GC inflation factor` = purrr::map_dbl(.x = data,
                                              ~ .x %>% 
                                                gcif())) %>% 
  rename(`Variable` = var,
         Year = year) %>% 
  select(-data) %>% 
  arrange(Year)
## select: dropped 14 variables (chr, rs, pos, n_miss, allele1, …)
## group_by: 2 grouping variables (var, year)
## rename: renamed one variable (p)
## rename: renamed one variable (p)
## rename: renamed one variable (p)
## rename: renamed one variable (p)
## rename: renamed one variable (p)
## rename: renamed one variable (p)
## rename: renamed one variable (p)
## rename: renamed one variable (p)
## mutate (grouped): new variable 'Pre-GC inflation factor' (double) with 8 unique values and 0% NA
## rename: renamed 2 variables (Variable, Year)
## select: dropped one variable (data)

Results

gxe_gwas %>% 
  filter(var == "day_length") %>% 
  filter(0.1 > adj_p) %>% 
  hair_manhattan(y_var = neglog10p, 
                 y_lab = "-log_10(Adj. p-value)",
                 plot_title = "Day length",
                 color1 = "#b9aa97",
                 color2 = "#7e756d",
                 facet = TRUE,
                 nfacets = 4,
                 desc = year)
## filter: removed 2,872,013 rows (50%), 2,872,013 rows remaining
## filter: removed 2,579,212 rows (90%), 292,801 rows remaining
## group_by: one grouping variable (chr)
## summarise: now 29 rows and 2 columns, ungrouped
## mutate: new variable 'tot' (double) with 29 unique values and 0% NA
## select: dropped one variable (chr_len)
## left_join: added 17 columns (rs, pos, n_miss, allele1, allele0, …)
##            > rows only in x         0
##            > rows only in y  (      0)
##            > matched rows     292,801    (includes duplicates)
##            >                 =========
##            > rows total       292,801
## mutate: new variable 'BPcum' (double) with 244,919 unique values and 0% NA
## ungroup: no grouping variables
## group_by: one grouping variable (chr)
## summarize: now 29 rows and 2 columns, ungrouped
## group_by: one grouping variable (chr)
## summarise: now 29 rows and 2 columns, ungrouped
## mutate: new variable 'tot' (double) with 29 unique values and 0% NA
## select: dropped one variable (chr_len)
## left_join: added 17 columns (rs, pos, n_miss, allele1, allele0, …)
##            > rows only in x         0
##            > rows only in y  (      0)
##            > matched rows     292,801    (includes duplicates)
##            >                 =========
##            > rows total       292,801
## mutate: new variable 'BPcum' (double) with 244,919 unique values and 0% NA
## ungroup: no grouping variables
## mutate: new variable 'chrcolor' (character) with 2 unique values and 0% NA
## rename: renamed one variable (facetvar)

gxe_gwas %>% 
  filter(var == "temp") %>% 
  filter(0.1 > adj_p) %>% 
  hair_manhattan(y_var = neglog10p, 
                 y_lab = "-log_10(Adj. p-value)",
                 plot_title = "Temperature",
                 color1 = "#b9aa97",
                 color2 = "#7e756d",
                 facet = TRUE,
                 nfacets = 4,
                 desc = year)
## filter: removed 2,872,013 rows (50%), 2,872,013 rows remaining
## filter: removed 2,586,541 rows (90%), 285,472 rows remaining
## group_by: one grouping variable (chr)
## summarise: now 29 rows and 2 columns, ungrouped
## mutate: new variable 'tot' (double) with 29 unique values and 0% NA
## select: dropped one variable (chr_len)
## left_join: added 17 columns (rs, pos, n_miss, allele1, allele0, …)
##            > rows only in x         0
##            > rows only in y  (      0)
##            > matched rows     285,472    (includes duplicates)
##            >                 =========
##            > rows total       285,472
## mutate: new variable 'BPcum' (double) with 243,838 unique values and 0% NA
## ungroup: no grouping variables
## group_by: one grouping variable (chr)
## summarize: now 29 rows and 2 columns, ungrouped
## group_by: one grouping variable (chr)
## summarise: now 29 rows and 2 columns, ungrouped
## mutate: new variable 'tot' (double) with 29 unique values and 0% NA
## select: dropped one variable (chr_len)
## left_join: added 17 columns (rs, pos, n_miss, allele1, allele0, …)
##            > rows only in x         0
##            > rows only in y  (      0)
##            > matched rows     285,472    (includes duplicates)
##            >                 =========
##            > rows total       285,472
## mutate: new variable 'BPcum' (double) with 243,838 unique values and 0% NA
## ungroup: no grouping variables
## mutate: new variable 'chrcolor' (character) with 2 unique values and 0% NA
## rename: renamed one variable (facetvar)

Step 3. Meta-analysis of year-specific GxE GWAS using Metasoft

srun java -jar source_functions/Metasoft/Metasoft.jar -pvalue_table source_functions/Metasoft/HanEskinPvalueTable.txt -input data/derived_data/metasoft/day_length/metasoft_in.day_length.txt -output data/derived_data/metasoft/day_length/metasoft_out.day_length.txt -mvalue -mvalue_p_thres 0.01 -log data/derived_data/metasoft/day_length/metasoft.day_length.log
srun java -jar source_functions/Metasoft/Metasoft.jar -pvalue_table source_functions/Metasoft/HanEskinPvalueTable.txt -input data/derived_data/metasoft/day_length/metasoft_in.day_length.txt -output data/derived_data/metasoft/day_length/metasoft_out.day_length_adj.txt -mvalue -mvalue_p_thres 0.01 -lambda_mean 1.401986 -lambda_hetero 0.332300 -log data/derived_data/metasoft/day_length/metasoft.day_length_adj.log
srun --account animalsci java -jar source_functions/Metasoft/Metasoft.jar -pvalue_table source_functions/Metasoft/HanEskinPvalueTable.txt -input data/derived_data/metasoft/temp/metasoft_in.temp.txt -output data/derived_data/metasoft/temp/metasoft_out.temp.txt -mvalue -mvalue_p_thres 0.01 -log data/derived_data/metasoft/temp/metasoft.temp.log
srun --account animalsci java -jar source_functions/Metasoft/Metasoft.jar -pvalue_table source_functions/Metasoft/HanEskinPvalueTable.txt -input data/derived_data/metasoft/temp/metasoft_in.temp.txt -output data/derived_data/metasoft/temp/metasoft_out.temp_adj.txt -mvalue -mvalue_p_thres 0.01 -lambda_mean 1.101779 -lambda_hetero 0.470108 -log data/derived_data/metasoft/temp/metasoft.temp_adj.log

Diagnostic

  • Day length
    • \(\lambda\) inflation factor = 0.9717184
ggqq(pvector = gxe_metasoft %>%
       filter(var == "day_length") %>% 
       filter(!is.na(pvalue_re2)) %>% 
       pull(pvalue_re2)) +
  ggtitle("Metasoft RE2 p-valules (day length)")
## filter: removed 691,934 rows (50%), 691,934 rows remaining
## filter: removed 178 rows (<1%), 691,756 rows remaining

  • Temperature
    • \(\lambda\) inflation factor = 0.9564245
ggqq(pvector = gxe_metasoft %>%
       filter(var == "temp") %>% 
       filter(!is.na(pvalue_re2)) %>% 
       pull(pvalue_re2)) +
  ggtitle("Metasoft RE2 p-valules (temperature)")
## filter: removed 691,934 rows (50%), 691,934 rows remaining
## filter: removed 178 rows (<1%), 691,756 rows remaining

Results

gxe_metasoft %>% 
  filter(var == "day_length") %>% 
  filter(0.5 > pvalue_re2) %>% 
  hair_manhattan(y_var = neglog10q_re2,
                 color1 = "#b9aa97",
                 color2 = "#7e756d",
                 y_lab = latex2exp::TeX("$-log_{10}(RE2 q-value)$"),
                 plot_title = "Day length",
                 sigline = q_fdr1) +
  geom_hline(yintercept = q_fdr5,
             color = "blue")
## filter: removed 691,934 rows (50%), 691,934 rows remaining
## filter: removed 350,045 rows (51%), 341,889 rows remaining
## group_by: one grouping variable (chr)
## summarise: now 29 rows and 2 columns, ungrouped
## mutate: new variable 'tot' (double) with 29 unique values and 0% NA
## select: dropped one variable (chr_len)
## left_join: added 25 columns (var, rsid, number_study, pvalue_re, beta_re, …)
##            > rows only in x         0
##            > rows only in y  (      0)
##            > matched rows     341,889    (includes duplicates)
##            >                 =========
##            > rows total       341,889
## mutate: new variable 'BPcum' (double) with 341,889 unique values and 0% NA
## ungroup: no grouping variables
## group_by: one grouping variable (chr)
## summarize: now 29 rows and 2 columns, ungrouped
## group_by: one grouping variable (chr)
## summarise: now 29 rows and 2 columns, ungrouped
## mutate: new variable 'tot' (double) with 29 unique values and 0% NA
## select: dropped one variable (chr_len)
## left_join: added 25 columns (var, rsid, number_study, pvalue_re, beta_re, …)
##            > rows only in x         0
##            > rows only in y  (      0)
##            > matched rows     341,889    (includes duplicates)
##            >                 =========
##            > rows total       341,889
## mutate: new variable 'BPcum' (double) with 341,889 unique values and 0% NA
## ungroup: no grouping variables
## mutate: new variable 'chrcolor' (character) with 2 unique values and 0% NA

gxe_metasoft %>% 
  filter(var == "temp") %>% 
  filter(0.5 > pvalue_re2) %>% 
  hair_manhattan(y_var = neglog10q_re2,
                 color1 = "#b9aa97",
                 color2 = "#7e756d",
                 y_lab = latex2exp::TeX("$-log_{10}(RE2 q-value)$"),
                 plot_title = "Temperature",
                 sigline = q_fdr1) +
  geom_hline(yintercept = q_fdr5,
             color = "blue")
## filter: removed 691,934 rows (50%), 691,934 rows remaining
## filter: removed 352,399 rows (51%), 339,535 rows remaining
## group_by: one grouping variable (chr)
## summarise: now 29 rows and 2 columns, ungrouped
## mutate: new variable 'tot' (double) with 29 unique values and 0% NA
## select: dropped one variable (chr_len)
## left_join: added 25 columns (var, rsid, number_study, pvalue_re, beta_re, …)
##            > rows only in x         0
##            > rows only in y  (      0)
##            > matched rows     339,535    (includes duplicates)
##            >                 =========
##            > rows total       339,535
## mutate: new variable 'BPcum' (double) with 339,535 unique values and 0% NA
## ungroup: no grouping variables
## group_by: one grouping variable (chr)
## summarize: now 29 rows and 2 columns, ungrouped
## group_by: one grouping variable (chr)
## summarise: now 29 rows and 2 columns, ungrouped
## mutate: new variable 'tot' (double) with 29 unique values and 0% NA
## select: dropped one variable (chr_len)
## left_join: added 25 columns (var, rsid, number_study, pvalue_re, beta_re, …)
##            > rows only in x         0
##            > rows only in y  (      0)
##            > matched rows     339,535    (includes duplicates)
##            >                 =========
##            > rows total       339,535
## mutate: new variable 'BPcum' (double) with 339,535 unique values and 0% NA
## ungroup: no grouping variables
## mutate: new variable 'chrcolor' (character) with 2 unique values and 0% NA